1 Preparations

1.1 Load libraries

1.2 Environment

col.os <- "#414a4c"  # Outer Space
col.rp <- "#7851a9"  # Royal Purple
col.cb <- "#b0b7c6"  # Cadet Blue
col.el <- "#ceff1d"  # Electric Lime
col.rm <- "#e3256b"  # Razzmatazz
scale.col.1 <- c()
theme_set(theme_minimal(base_family = "Candara"))
options(dplyr.summarise.inform = TRUE)

col.plos.yellow <- "#D6E13D"  # PLOS ONE
col.plos.pink <- "#CF00A3"  # PLOS ONE
scale.col.1 <- c()
font.1 <- "Candara"
theme_set(theme_minimal(base_family = font.1))
options(dplyr.summarise.inform = TRUE)

t.img <- png::readPNG("fig/twitter.png")
t.grob <- grid::rasterGrob(t.img, 
                           width = unit(0.4, "cm"), 
                           height = unit(0.4, "cm"),
                           interpolate = FALSE)

2 Load DataSets

https://nlftp.mlit.go.jp/ksj/gml/datalist/KsjTmplt-L01-v3_1.html

Land Price Open Dataset

L01_006/_100: Price (JPY/m\(^{2}\))
L01_007: Price Change (%)
L01_022: District Code
L01_023: City
L01_024: Address

admin.sf <- sf::read_sf(
    "input/Administrative_district_Tokyo_2021/N03-21_13_210101.shp"
) %>% rmapshaper::ms_filter_islands(min_area = 1e8)

lp.sf <- sf::read_sf("input/Land-Price_Tokyo_2022/L01-22_13.shp")

# print(st_crs(admin.sf))  # JGD2011
# print(st_crs(lp.sf))  # JGD2000

admin.sf <- admin.sf %>% st_transform("+proj=longlat +ellps=WGS84")
lp.sf <- lp.sf %>% st_transform("+proj=longlat +ellps=WGS84")
admin.code <- readxl::read_excel(
  "input/AdminiBoundary_CD.xlsx", skip = 1
) %>% rename("District" = "行政区域コード")

3 Preprocessing

3.1 Concat Polygons by District

admin.sf <- admin.sf %>% 
  rename(District = N03_007) %>% 
  select(District) %>% 
  group_by(District) %>% 
  summarise(.groups = "drop") %>% 
  left_join(admin.code %>% select(District, City), by = "District")

3.2 Transform and Rename

city.rm <- c(
    "Ooshima", 
    "Niijima", 
    "Kouzushima", 
    "Miyake", 
    "Hachijyou", 
    "Ogasawara"
)

lp.sf <- lp.sf %>% mutate(
    Price = L01_006 / 1e6,
    Price_Change = L01_007, 
) %>% rename(
    District = L01_022,
    # City = L01_023,
    Address = L01_024
) %>% 
    left_join(admin.code %>% select(District, City), by = "District") %>% 
    filter(!City %in% city.rm)

4 Exploratory Data Analysis

4.1 Descriptive Statistics

lp.sf.stat <- lp.sf %>% select(District, City, Price, Price_Change) %>% 
  group_by(District, City) %>% 
  summarise(
    Price.Mean = round(mean(Price, na.rm = TRUE), 3), 
    Price.Median = round(median(Price, na.rm = TRUE), 3), 
    Price.Std = round(sd(Price, na.rm = TRUE), 3), 
    Price_Change.Mean = round(mean(Price_Change, na.rm = TRUE), 3), 
    Price_Change.Median = round(median(Price_Change, na.rm = TRUE), 3), 
    Price_Change.Std = round(sd(Price_Change, na.rm = TRUE), 3), 
    .groups = "drop"
  ) %>% 
  st_set_geometry(NULL) %>% 
  left_join(admin.sf %>% select(District), by = "District") %>% 
  st_set_geometry(value = "geometry")

view.table(lp.sf.stat %>% st_set_geometry(NULL))

4.2 Price and Price_Change Distribution

p1 <- lp.sf %>% select(Price) %>% 
  ggplot() + 
  geom_histogram(mapping = aes(x = Price), binwidth = 0.2, 
                 fill = col.cb, color = "white") + 
  xlim(NA, 20) + 
  labs(x = NULL, y = NULL, title = expression("Price (1e6 JPY)"))

p2 <- lp.sf %>% select(Price_Change) %>% 
  ggplot() + 
  geom_histogram(mapping = aes(x = Price_Change), bins = 100, 
                 fill = col.cb, color = "white") + 
  labs(x = NULL, y = NULL, title = expression("Price Change (%)"))

p1 / p2

# boxplot

4.3 Choropleth Map: Tokyo

p1 <- lp.sf.stat %>% 
    ggplot() + 
    geom_sf(mapping = aes(), data = admin.sf, 
            fill = col.cb, color = col.cb, alpha = 0.1, size = 0.1) + 
    geom_sf(mapping = aes(fill = Price.Mean)) + 
    rcartocolor::scale_fill_carto_c(name = "", palette = "Magenta") + 
    geom_sf_text(aes(label = Price.Mean), color = "white", 
                 cex = 2.5, family = "Candara") + 
    labs(x = NULL, y = NULL, title = "Mean Price (1e6 JPY)") + 
    theme(title = element_text(size = 15), 
          legend.position = c(0.05, 0.35), 
          plot.margin = unit(c(0, 0, 30, 0), units = "pt")) + 
    guides(fill = guide_colourbar(
        barwidth = 0.5, barheight = 10, label.position = "right", 
        ticks.linewidth = 3
    ))

p2 <- lp.sf.stat %>% 
    ggplot() + 
    geom_sf(mapping = aes(), data = admin.sf, 
            fill = col.cb, color = col.cb, alpha = 0.1, size = 0.1) + 
    geom_sf(mapping = aes(fill = Price.Median)) + 
    rcartocolor::scale_fill_carto_c(name = "", palette = "Magenta") + 
    geom_sf_text(aes(label = Price.Median), color = "white", 
                 cex = 2.5, family = "Candara") + 
    labs(x = NULL, y = NULL, title = "Median Price (1e6 JPY)") + 
    theme(title = element_text(size = 15), 
          legend.position = c(0.05, 0.35), 
          plot.margin = unit(c(0, 0, 30, 0), units = "pt")) + 
    guides(fill = guide_colourbar(
        barwidth = 0.5, barheight = 10, label.position = "right", 
        ticks.linewidth = 3
    ))

p3 <- lp.sf.stat %>% 
    ggplot() + 
    geom_sf(mapping = aes(), data = admin.sf, 
            fill = col.cb, color = col.cb, alpha = 0.1, size = 0.1) + 
    geom_sf(mapping = aes(fill = Price_Change.Mean)) + 
    rcartocolor::scale_fill_carto_c(name = "", palette = "ArmyRose") + 
    geom_sf_text(aes(label = Price_Change.Mean), color = "white", 
                 cex = 2.5, family = "Candara") + 
    labs(x = NULL, y = NULL, title = "Mean Price Change (%)") + 
    theme(title = element_text(size = 15), 
          legend.position = c(0.05, 0.35), 
          plot.margin = unit(c(0, 0, 0, 0), units = "pt")) + 
    guides(fill = guide_colourbar(
        barwidth = 0.5, barheight = 10, label.position = "right", 
        ticks.linewidth = 3
    ))

p1 / p2 / p3

# ggsave("fig/Land-price_Tokyo.jpg", dpi = 300, width = 10, height = 15)

4.4 Choropleth Map: Central Tokyo

p1 <- lp.sf.stat %>% filter(District <= 13123) %>% 
    ggplot() + 
  geom_sf(mapping = aes(), data = admin.sf %>% filter(District <= 13123), 
          fill = col.cb, color = col.cb, alpha = 0.1, size = 0.1) + 
  geom_sf_text(aes(label = City), color = col.os, 
               cex = 4, family = "Candara") + 
  labs(x = NULL, y = NULL, title = "Central Tokyo Districts") + 
  theme(title = element_text(size = 15), 
        plot.margin = unit(c(0, 0, 40, 0), units = "pt"))

p2 <- lp.sf.stat %>% filter(District <= 13123) %>% 
  ggplot() + 
  geom_sf(mapping = aes(), data = admin.sf %>% filter(District <= 13123), 
          fill = NA, color = col.cb, alpha = 0.1, size = 0.1) + 
  geom_sf(mapping = aes(fill = Price.Median), color = "white") + 
  geom_sf_text(aes(label = Price.Median), color = col.os, 
               cex = 4, family = "Candara") + 
  rcartocolor::scale_fill_carto_c(name = "", palette = "Tropic") + 
  labs(x = NULL, y = NULL, title = "Median Land Price (1e6 JPY)") + 
  theme(title = element_text(size = 15), 
        legend.position = c(0.05, 0.3), 
        plot.margin = unit(c(0, 0, 40, 0), units = "pt")) + 
  guides(fill = guide_colourbar(
    barwidth = 0.5, barheight = 15, label.position = "right", 
    ticks.linewidth = 3
  ))

p3 <- lp.sf.stat %>% filter(District <= 13123) %>% 
  ggplot() + 
  geom_sf(mapping = aes(), data = admin.sf %>% filter(District <= 13123), 
          fill = NA, color = col.cb, alpha = 0.1, size = 0.1) + 
  geom_sf(mapping = aes(fill = Price_Change.Mean), color = "white") + 
  geom_sf_text(aes(label = Price_Change.Mean), color = col.os, 
               cex = 4, family = "Candara") + 
  rcartocolor::scale_fill_carto_c(name = "", palette = "ArmyRose") + 
  labs(x = NULL, y = NULL, title = "Mean Land Price Change (%)") + 
  theme(title = element_text(size = 15), 
        legend.position = c(0.05, 0.3), 
        plot.margin = unit(c(0, 0, 0, 0), units = "pt")) + 
  guides(fill = guide_colourbar(
    barwidth = 0.5, barheight = 15, label.position = "right", 
    ticks.linewidth = 3
  ))

p1 / p2 / p3

# ggsave("fig/Land-price_Central-Tokyo.jpg", dpi = 300, width = 10, height = 30)

5 Spatial Clustering with ClustGeo

5.1 Standardize Node Features

lp.stat.scale <- lp.sf.stat %>% 
    as.data.frame() %>% 
    select(-District, -City, -Price_Change.Median, -geometry) %>% 
    scale() %>% as.data.frame()

5.2 Compute Neighboorhod list with spdep::poly2nb

Works fine with MULTIPOLYGON, but not with NA.

lp.stat.nb <- spdep::poly2nb(lp.sf.stat %>% select(geometry))

5.3 Compute dissimilarities

A <- spdep::nb2mat(lp.stat.nb, style = "B")
diag(A) <- 1
colnames(A) <- rownames(A) <- rownames(lp.stat.scale)

5.4 Optimize alpha

D0 <- dist(lp.stat.scale)
D1 <- as.dist(1 - A)
alpha.seq <- seq(0, 1, 0.1)
K <- 5
cr <- choicealpha(D0, D1, alpha.seq, K, 
                  wt = NULL, scale = TRUE, graph = FALSE)
cr$Q  # proportion of explained inertia
##                  Q0        Q1
## alpha=0   0.8583256 0.2264027
## alpha=0.1 0.8212083 0.2812836
## alpha=0.2 0.7165387 0.3587420
## alpha=0.3 0.7165387 0.3587420
## alpha=0.4 0.7165387 0.3587420
## alpha=0.5 0.6658200 0.3609689
## alpha=0.6 0.6658200 0.3609689
## alpha=0.7 0.6658200 0.3609689
## alpha=0.8 0.4980573 0.3751056
## alpha=0.9 0.3581445 0.3792360
## alpha=1   0.3618860 0.3784321
crQ.df <- data.frame(cr$Qnorm) %>% mutate(alpha = alpha.seq)
rownames(crQ.df) <- NULL
crQ.df %>% ggplot() + 
  geom_point(mapping = aes(x = alpha, y = Q0norm), 
             color = col.plos.yellow, size = 2) + 
  geom_line(mapping = aes(x = alpha, y = Q0norm), 
            color = col.plos.yellow, size = 1) + 
  geom_point(mapping = aes(x = alpha, y = Q1norm), 
             color = col.plos.pink, size = 2) + 
  geom_line(mapping = aes(x = alpha, y = Q1norm), 
            color = col.plos.pink, size = 1) + 
  lims(y = c(0, NA)) + 
  labs(y = "Q: proportion of explained inertia", 
       title = "ClustGeo::choicealpha") + 
  theme(plot.title = element_text(size = 15))

5.5 Regionalization

tree <- 
  ClustGeo::hclustgeo(D0 = D0, D1 = D1, alpha = 0.2, scale = TRUE, wt = NULL)

5.5.1 ncluster = 3

lp.sf.stat <- lp.sf.stat %>% 
  mutate(partition.3 = factor(cutree(tree = tree, k = 3), 
                              levels = 1:3, labels = 1:3))
table(lp.sf.stat$partition.3)
## 
##  1  2  3 
##  5 22 24
p1 <- lp.sf.stat %>% 
    filter(!City %in% c("Oku-Tama", "Hinohara")) %>% 
    ggplot() + 
    geom_sf(mapping = aes(), 
            data = admin.sf %>% filter(!City %in% c("Oku-Tama", "Hinohara")), 
            fill = col.cb, color = col.cb, alpha = 0.1, size = 0.1) + 
    geom_sf(mapping = aes(fill = Price.Median)) + 
    rcartocolor::scale_fill_carto_c(name = "", palette = "Magenta") + 
    geom_sf_text(aes(label = Price.Median), color = "white",
                 cex = 3, family = "Candara") +
    labs(x = NULL, y = NULL, title = "Median Price (1e6 JPY)") + 
    theme(title = element_text(size = 15),
          legend.position = c(0.05, 0.35), 
          plot.margin = unit(c(0, 0, 20, 0), units = "pt")) + 
    guides(fill = guide_colourbar(
        barwidth = 0.5, barheight = 12, label.position = "right", 
        ticks.linewidth = 3
    ))

p2 <- lp.sf.stat %>% 
    filter(!City %in% c("Oku-Tama", "Hinohara")) %>% 
    ggplot() + 
    geom_sf(mapping = aes(), 
            data = admin.sf %>% filter(!City %in% c("Oku-Tama", "Hinohara")), 
            fill = col.cb, color = col.cb, alpha = 0.1, size = 0.1) + 
    geom_sf(mapping = aes(fill = Price_Change.Mean)) + 
    rcartocolor::scale_fill_carto_c(name = "", palette = "Magenta") + 
    geom_sf_text(aes(label = Price_Change.Mean), color = "white",
                 cex = 3, family = "Candara") +
    labs(x = NULL, y = NULL, title = "Mean Price Change (%)") + 
    theme(title = element_text(size = 15), 
          legend.position = c(0.05, 0.35), 
          plot.margin = unit(c(0, 0, 20, 0), units = "pt")) + 
    guides(fill = guide_colourbar(
        barwidth = 0.5, barheight = 12, label.position = "right", 
        ticks.linewidth = 3
    ))

p3 <- lp.sf.stat %>% ggplot() + 
    geom_sf(mapping = aes(fill = partition.3), size = 0.2, alpha = 0.5) + 
    scale_fill_carto_d(name = "", palette = "Vivid") + 
    geom_sf_text(aes(label = City), color = col.os, 
                 cex = 3, family = "Candara") + 
    labs(x = NULL, y = NULL, title = "ncluster = 3") + 
    theme(title = element_text(size = 15), 
          legend.position = "none", 
          plot.margin = unit(c(0, 0, 0, 0), units = "pt"))

p1 / p2 / p3 + 
    plot_annotation(
    title = "Spatial Clustering on Price Stats", 
    subtitle = "", 
    theme = theme(title = element_text(size = 20, family = "Candara"))
    )

ggsave("fig/Spatial-Clustering-Tokyo_ClustGeo_n3.jpg", 
       dpi = 300, width = 15, height = 10 * 3)

5.5.2 ncluster = 5

lp.sf.stat <- lp.sf.stat %>% 
  mutate(partition.5 = factor(cutree(tree = tree, k = 5), 
                              levels = 1:5, labels = 1:5))
table(lp.sf.stat$partition.5)
## 
##  1  2  3  4  5 
##  5 22 10  7  7
p1 <- lp.sf.stat %>% 
    filter(!City %in% c("Oku-Tama", "Hinohara")) %>% 
    ggplot() + 
    geom_sf(mapping = aes(), 
            data = admin.sf %>% filter(!City %in% c("Oku-Tama", "Hinohara")), 
            fill = col.cb, color = col.cb, alpha = 0.1, size = 0.1) + 
    geom_sf(mapping = aes(fill = Price.Median)) + 
    rcartocolor::scale_fill_carto_c(name = "", palette = "Magenta") + 
    geom_sf_text(aes(label = Price.Median), color = "white",
                 cex = 3, family = "Candara") +
    labs(x = NULL, y = NULL, title = "Median Price (1e6 JPY)") + 
    theme(title = element_text(size = 15),
          legend.position = c(0.05, 0.35), 
          plot.margin = unit(c(0, 0, 20, 0), units = "pt")) + 
    guides(fill = guide_colourbar(
        barwidth = 0.5, barheight = 12, label.position = "right", 
        ticks.linewidth = 3
    ))

p2 <- lp.sf.stat %>% 
    filter(!City %in% c("Oku-Tama", "Hinohara")) %>% 
    ggplot() + 
    geom_sf(mapping = aes(), 
            data = admin.sf %>% filter(!City %in% c("Oku-Tama", "Hinohara")), 
            fill = col.cb, color = col.cb, alpha = 0.1, size = 0.1) + 
    geom_sf(mapping = aes(fill = Price_Change.Mean)) + 
    rcartocolor::scale_fill_carto_c(name = "", palette = "Magenta") + 
    geom_sf_text(aes(label = Price_Change.Mean), color = "white",
                 cex = 3, family = "Candara") +
    labs(x = NULL, y = NULL, title = "Mean Price Change (%)") + 
    theme(title = element_text(size = 15), 
          legend.position = c(0.05, 0.35), 
          plot.margin = unit(c(0, 0, 20, 0), units = "pt")) + 
    guides(fill = guide_colourbar(
        barwidth = 0.5, barheight = 12, label.position = "right", 
        ticks.linewidth = 3
    ))

p3 <- lp.sf.stat %>% ggplot() + 
    geom_sf(mapping = aes(fill = partition.5), size = 0.2, alpha = 0.5) + 
    scale_fill_carto_d(name = "", palette = "Vivid") + 
    geom_sf_text(aes(label = City), color = col.os, 
                 cex = 3, family = "Candara") + 
    labs(x = NULL, y = NULL, title = "ncluster = 5") + 
    theme(title = element_text(size = 15), 
          legend.position = "none", 
          plot.margin = unit(c(0, 0, 0, 0), units = "pt"))

p1 / p2 / p3 + 
    plot_annotation(
    title = "Spatial Clustering on Price Stats", 
    subtitle = "", 
    theme = theme(title = element_text(size = 20, family = "Candara"))
    )

ggsave("fig/Spatial-Clustering-Tokyo_ClustGeo_n5.jpg", 
       dpi = 300, width = 15, height = 10 * 3)

5.5.3 ncluster = 10

lp.sf.stat <- lp.sf.stat %>% 
  mutate(partition.10 = factor(cutree(tree = tree, k = 10), 
                               levels = 1:10, labels = 1:10))
table(lp.sf.stat$partition.10)
## 
##  1  2  3  4  5  6  7  8  9 10 
##  5  7  3  4  4  4  5  5  7  7
p1 <- lp.sf.stat %>% ggplot() + 
    geom_sf(mapping = aes(fill = partition.3), size = 0.2, alpha = 0.5) + 
    scale_fill_carto_d(name = "", palette = "Vivid") + 
    geom_sf_text(aes(label = City), color = col.os, 
                 cex = 3, family = "Candara") + 
    labs(x = NULL, y = NULL, title = "ncluster = 3") + 
    theme(title = element_text(size = 15), 
          legend.position = "none", 
          plot.margin = unit(c(0, 0, 30, 0), units = "pt")) + 
  annotation_custom(grob = t.grob, 
                    xmin = 139.8, xmax = 139.8, 
                    ymin = 35.83, ymax = 35.83) + 
  annotate(geom = "text", x = 139.83, y = 35.83, 
           label = "@Maxwell_110", size = 3, family = "Candara", color = "gray")

p2 <- lp.sf.stat %>% ggplot() + 
    geom_sf(mapping = aes(fill = partition.5), size = 0.2, alpha = 0.5) + 
    scale_fill_carto_d(name = "", palette = "Vivid") + 
    geom_sf_text(aes(label = City), color = col.os, 
                 cex = 3, family = "Candara") + 
    labs(x = NULL, y = NULL, title = "ncluster = 5") + 
    theme(title = element_text(size = 15), 
          legend.position = "none", 
          plot.margin = unit(c(0, 0, 30, 0), units = "pt"))

p3 <- lp.sf.stat %>% ggplot() + 
    geom_sf(mapping = aes(fill = partition.10), size = 0.2, alpha = 0.5) + 
    scale_fill_carto_d(name = "", palette = "Vivid") + 
    geom_sf_text(aes(label = City), color = col.os, 
                 cex = 3, family = "Candara") + 
    labs(x = NULL, y = NULL, title = "ncluster = 10") + 
    theme(title = element_text(size = 15), 
          legend.position = "none", 
          plot.margin = unit(c(0, 0, 0, 0), units = "pt"))

p1 / p2 / p3 + 
    plot_annotation(
    title = "Spatial Clustering on Price Stats", 
    subtitle = "", 
    theme = theme(title = element_text(size = 20, family = "Candara"))
    )

ggsave("fig/Spatial-Clustering-Tokyo_ClustGeo_n3-10.jpg", 
       dpi = 300, width = 15, height = 10 * 3)